'''
Created on Nov 1, 2011

@author: oabalbin
'''

def read_mutated_genes(file,ofile):
    '''
    Read file with cosmic mutated positions in HCC2218/HCC1599 cell lines
    '''
    ifile= open(file)
    ofile=open(ofile,'w')
    ol=["Sample","geneName","CDS_Position","Mutation","AA_Mutation", "Somatic_Status","Zygosity","Verified"]
    ofile.write(",".join(ol).replace(',','\t')+'\n')
    cdsf=3
    for l in ifile:
        if l.startswith("#"):
            continue
        f=l.strip('\n').split('\t')
        cds=f[cdsf].find('>')
        if cds>=0:
            cd=f[cdsf][:-4]
            ch=f[cdsf][-4:]
        elif f[cdsf].find('del') >=0: 
            delt=f[cdsf].find('del')
            #print delt, f[2], list(f[2]), len(f[2]), len(f[2])-delt
            
            cd=f[cdsf][:delt]
            ch=f[cdsf][-(len(f[2])-delt):]
            
        else:
            cd=f[cdsf]
            ch=f[cdsf]
        ol=[f[0],f[1],cd, ch, f[2],f[4],f[5],f[6]]
        print f
        ofile.write(",".join(ol).replace(',','\t')+'\n')
    ifile.close()
    ofile.close()
        
def read_notmutated_genes(file,ofile):
    '''
    '''
    ifile= open(file)
    ofile=open(ofile,'w')
    genes=[]
    for l in ifile:
        if l.startswith("#"):
            continue
        fields=l.strip('\n').split('\t')
        print fields
        for f in fields[1:]: # only for HCC1599, not for HCC2218            
            if f:
                genes.append(f.strip('"'))
    
    for g in genes:
        ofile.write(g+'\n')
    
    ifile.close()  
    ofile.close()
'''    
file="/home/oabalbin/Desktop/HCC2218/HCC2218_genes_with_mutations_full.txt"
ofile="/home/oabalbin/Desktop/HCC2218/HCC2218_genes_with_mutations2.txt"
file2='/home/oabalbin/Desktop/HCC2218/HCC2218_genes_withNot_mutations.txt'
ofile2='/home/oabalbin/Desktop/HCC2218/HCC2218_genes_withNot_mutations2.txt'
'''
file="/home/oabalbin/Desktop/HCC1599/HCC1599_genes_with_mutations.txt"
ofile="/home/oabalbin/Desktop/HCC1599/HCC1599_genes_with_mutations2.txt"
file2='/home/oabalbin/Desktop/HCC1599/HCC1599_Genes_withNot_muations.txt'
ofile2='/home/oabalbin/Desktop/HCC1599/HCC1599_Genes_withNot_mutations2.txt'



read_mutated_genes(file,ofile)
read_notmutated_genes(file2,ofile2)

